Visualize state mentions reddit data

# Create an interactive plot that shows moving average sentiment of state mentions over time
import plotly.express as px
import numpy as np
import pandas as pd

# Load data for mentions over time
data = pd.read_csv('tmp/nlp_results.csv')

# Create year week variable
data['date'] = pd.to_datetime(data['date'])
data['year_week'] = data['date'].dt.strftime('%Y-%U')

# Group data by year week, average sentiment

grouped_dat = data.groupby(['date', 'states_mentioned']).agg(average_sentiment=('sent', 'mean')).reset_index()
grouped_dat['moving_average'] = grouped_dat.groupby(['states_mentioned'])['average_sentiment'].transform(lambda x: x.rolling(14, 1).mean())
print(grouped_dat.head(10))
        date states_mentioned  average_sentiment  moving_average
0 2023-06-01          Alabama           1.000000        1.000000
1 2023-06-01           Alaska           0.333333        0.333333
2 2023-06-01          Arizona          -1.000000       -1.000000
3 2023-06-01         Arkansas           0.000000        0.000000
4 2023-06-01       California          -0.333333       -0.333333
5 2023-06-01         Colorado          -1.000000       -1.000000
6 2023-06-01      Connecticut           0.111111        0.111111
7 2023-06-01         Delaware           1.000000        1.000000
8 2023-06-01          Florida           0.000000        0.000000
9 2023-06-01          Georgia           0.166667        0.166667
# Plot interactive figure
fig = px.line(grouped_dat, x='date', y='moving_average', facet_col='states_mentioned', color='states_mentioned',
              facet_col_wrap=5, facet_row_spacing=0.04, facet_col_spacing=0.04, template='plotly_white',
                 hover_data={'moving_average':':.2f',
                             'states_mentioned':False},
             color_discrete_sequence=["#F8766D", "#F37B59", "#ED8141", "#E7861B", "#E08B00", "#D89000", "#CF9400", "#C59900", "#BB9D00",
"#AFA100", "#A3A500", "#95A900", "#85AD00", "#72B000", "#5BB300", "#39B600", "#00B820", "#00BA42",
"#00BC59", "#00BE6C", "#00BF7D", "#00C08D", "#00C19C", "#00C1AA", "#00C0B8", "#00BFC4", "#00BDD0",
"#00BBDB", "#00B8E5", "#00B4EF", "#00B0F6", "#00ABFD", "#00A5FF", "#529EFF", "#7997FF", "#9590FF",
"#AC88FF", "#BF80FF", "#CF78FF", "#DC71FA", "#E76BF3", "#F066EA", "#F763E0", "#FC61D5", "#FF61C9",
"#FF62BC", "#FF65AE", "#FF689F", "#FF6C90", "#FC717F"])

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

fig.for_each_xaxis(lambda x: x.update(title=None))
fig.for_each_yaxis(lambda y: y.update(title=None))

fig.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=-0.04, 
    yanchor='middle',
    yref='paper',
    y=0.5,
    textangle=270,
    text='Average Sentiment'
)
fig.update_layout(
    yaxis=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis6=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis11=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis16=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis21=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis26=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis31=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis36=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis41=dict(
        nticks=3 # Asks for approximately 3 ticks
    ),
    yaxis46=dict(
        nticks=3 # Asks for approximately 3 ticks
    )
)
fig.update_layout(width=900, height=800, showlegend=False)
fig.show()
fig.write_html("../../website-source/plots/interactive_state_sentiments.html")